package com.dataclean.lk

import org.apache.spark.sql.SparkSession

object CustomerClean {

  def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder()
      .appName("CustomerClean")
      .config("spark.sql.warehouse.dir","hdfs://master:9000/user/hive/warehouse")
      .enableHiveSupport()
      .getOrCreate()

      spark.sqlContext.sql("use ods")
      //将ods库的表数据放入临时表中
      spark.sqlContext.sql(
        """
          |select
          |*
          |from
          |customer_lk
          |""".stripMargin).createOrReplaceTempView("customer_lk_temp")
      //清洗去重之后将数据存入dwd库的表中
      spark.sqlContext.sql(
        """
          |insert into table dwd.customer_lk
          |select
          |distinct
          |CUSTKEY,
          |NAME,
          |ADDRESS,
          |NATIONKEY,
          |PHONE,
          |ACCTBAL,
          |MKTSEGMENT,
          |COMMENT
          |from
          |customer_lk_temp
          |""".stripMargin)



  }

}
